# Start with a clean workspace
rm(list = ls())

# set working directory
setwd("Replication_Data")
# Function to load packages
loadPkg=function(toLoad){
       for(lib in toLoad){
              if(! lib %in% installed.packages()[,1])
              {install.packages(lib, repos='http://cran.rstudio.com/')}
              suppressMessages( library(lib, character.only=TRUE))}}

# Load libraries
packs=c("maps", "rworldmap", "maptools", "sp", "spdep", "gstat", "MatchIt","cem",
        "spatstat", "coefplot", "countrycode",  "cshapes", "arm", "stargazer","scales",
        "texreg", "reshape2","ggplot2", 'foreign', 'car', 'lme4', 'dplyr', "ggmap","ggrepel",
        "plotROC","pROC","purrr","tibble","magrittr","ROCR","caTools","xtable","rgdal",
        "lattice","pscl","MASS")
loadPkg(packs)

# set a ggplot theme for all figures
theme_set(theme_bw())

# load the function for coefficient plot
source("coefficientplot.R")
####load data
load("data/data.RData")

### Main models
## ------------------------------------------------------------------------

m1.2 <-  bayesglm(TP_mic_yes2 ~ neighbor + invest_pre_total + tp_polity2 +tp_involv_decay + warstate_polity2 + ged_total+ log(duration+1) + 
                         log(warstate_gdppc) + log(warstate_population) + factor(Outcome) +  peace_month +peace_month_sq + peace_month_cub, 
                  data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)

m2.2 <- bayesglm(TP_mic_yes2 ~ neighbor + invest_pre_total + tp_polity2 + tp_involv_decay + warstate_polity2 +ged_total+log(duration+1) + 
                        log(warstate_gdppc) + log( warstate_population)+  neighbor*invest_pre_total +  factor(Outcome) + 
                        peace_month +peace_month_sq + peace_month_cub, 
                 data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)

m3.2 <- bayesglm(TP_mic_yes2 ~ neighbor + invest_pre_total + tp_polity2 + tp_aid + tp_arms + tp_involv_decay +log(duration+1) + 
                        warstate_polity2 + ged_total+ log(warstate_gdppc) + log( warstate_population)+ neighbor*invest_pre_total + 
                        factor(Outcome) + 
                        peace_month + peace_month_sq + peace_month_cub, data = data,
                 family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)

####control for NSA
m4.2 <- bayesglm(TP_mic_yes2 ~ NSA_tp + invest_pre_total +  tp_involv_decay + warstate_polity2 + ged_total+ log(warstate_gdppc) + log(duration+1) + 
                        log(warstate_population) + factor(Outcome) +  peace_month +peace_month_sq + peace_month_cub, 
                 data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)

m5.2 <- bayesglm(TP_mic_yes2 ~ NSA_tp + invest_pre_total +  tp_involv_decay + warstate_polity2 + ged_total+ log(warstate_gdppc) +log(duration+1) + 
                        log( warstate_population) +  NSA_tp*invest_pre_total + factor(Outcome) + peace_month +peace_month_sq + peace_month_cub, 
                 data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)


screenreg(list(m1.2, m2.2, m3.2, m4.2, m5.2))

### ##### Online Appendix Table A2: 
stage1_names3  <-  c("Intercept","TP Neighbouring","TP Wartime MICs(count)", "TP Polity2", "Postwar MIC(decay)",
                     "Polity2", "Count of Postwar Violence", "Conflict Duration (log)", "GDP per Capita (log)","Population (log)",
                     "Ceasefire", "Government Victory", "Rebel Victory", "Low Activity",
                     "Peace Months","Peace Months$^2$", "Peace Months$^3$",
                     "TP Neighbouring*TP Wartime MICs(count)",
                     "TP Aid", "TP Arms Transfer", "Non-state TP(=1)", "Non-state TP(=1)*TP Wartime MICs(count)")

screenreg(list(m1.2, m2.2, m3.2, m4.2, m5.2),stars = c(0.001, 0.01, 0.05, 0.1),custom.coef.names = stage1_names3)

texreg(list(m1.2, m2.2, m3.2, m4.2, m5.2), file = "Table_A2.tex", single.row=F,
       # custom.model.names =c("Dependent Variable: Post-Conflict Involvement"),
       stars = c(0.001, 0.01, 0.05, 0.1), booktable=TRUE, 
       custom.coef.names = stage1_names3, dcolumn =FALSE, use.packages = TRUE, longtable = FALSE,
       scalebox = 1.0, digits = 3, 
       caption = "Bayesian Logistic Regression on the Post-conflict Involvement",
       label ="bayestab1",
       caption.above = TRUE)


### Figure 3
plot.coef(modResults = list(m1.2, m2.2, m3.2, m4.2, m5.2), data = data, 
          vars = c("neighbor", "NSA_tp", "tp_aid", "tp_arms","invest_pre_total", "tp_involv_decay")) +  
       scale_x_discrete(labels=c("TP Wartime MICs(count)", "TP Neighbouring",
                                 "TP Neighbouring*TP Wartime MICs(count)",
                                 "Non-state TP(=1)","Non-state TP(=1)*TP Wartime MICs(count)",
                                 "TP Aid",  "TP Arms Transfer", "Postwar MIC(decay)"
       ))
ggsave("figures/Figure_3.jpeg", width = 9, height = 6)


#Figure 4

p <- plot_interEffect(ModelResults = m3.2, n.sim = 1000, data = data, 
                      varname1 = "neighbor", varname2 = 'invest_pre_total',val1 = 0, val2 = 20, 
                      label = c('No Neighbouring third-party',"Neighbouring third-party"), xlabs = "Third-party Wartime MICs",
                      ylabs = "Pr(postwar involvement=1)",
                      intervals = 2, facet = F)  
ggsave("figures/Figure_4a.jpeg", plot = p, width = 6, height = 6)

p2 <- plot_interEffect(ModelResults = m5.2, n.sim = 1000, data = data, 
                       varname1 = "NSA_tp", varname2 = 'invest_pre_total',val1 = 0, val2 = 20, 
                       label = c('State third-party',"Non-state third-party"), xlabs = "Third-party Wartime MICs",
                       ylabs = "Pr(postwar involvement=1)",
                       intervals = 2, facet = F)  
ggsave("figures/Figure_4b.jpeg", plot = p2, width = 6, height = 6)


### Figure 5: ROC

###use some basic glm models
roc1 <- glm(TP_mic_yes2 ~ tp_polity2 + warstate_polity2 + factor(Outcome) + 
                   log(warstate_gdppc) + log( warstate_population) +
                   peace_month +peace_month_sq + peace_month_cub, 
            data = data, family = binomial(link = "logit"))
roc2 <- glm(TP_mic_yes2 ~ neighbor +  tp_polity2 + warstate_polity2 + 
                   log(warstate_gdppc) + log( warstate_population) + factor(Outcome)+
                   peace_month +peace_month_sq + peace_month_cub, 
            data = data, family = binomial(link = "logit"))
## use the main model 2
roc3 <-  m3.2

##ROC plot```
pp1 <- as.vector(predict(roc1, type="response"))
Y <- roc1$model$TP_mic_yes2
p.roc1 <- roc(Y,pp1,plot = TRUE, ci=TRUE)

pp2 <- as.vector(predict(roc2, type="response"))
Y <- roc2$model$TP_mic_yes2
p.roc2 <- roc(Y,pp2,plot = TRUE, ci=TRUE)


pp3 <- as.vector(predict(roc3, type ="response"))
Y <- roc3$model$TP_mic_yes2
p.roc3 <- roc(Y,pp3,plot = TRUE, ci=TRUE)

## put together for ggplot
ModelResults = list(roc1,roc2, roc3); linetypes = c("solid", "dotted", "longdash"); interval=0.2


pred_dv =lapply(ModelResults, function(x)
       FUN = predict(x, type = "response"))

Y = lapply(ModelResults, function(x) FUN = x$y)

roc = Map(function(x, y) roc(x,y), Y, pred_dv)

roc_df  = lapply(roc, function(x)
       FUN= data.frame(plotx = x$specificities,
                       ploty = rev(x$sensitivities),
                       name = paste("AUC =",
                                    sprintf("%.3f",x$auc)))) %>%
       map_df(., rbind)

unique(roc_df$name)

roc_df <- roc_df %>% 
       mutate(model = ifelse(name == "AUC = 0.743", "GLM Controls only, AUC = 0.743", name)) %>% 
       mutate(model = ifelse(name == "AUC = 0.871", "GLM Neighbouring TP, AUC = 0.871", model)) %>% 
       mutate(model = ifelse(name == "AUC = 0.978", "Bayesglm full model, AUC = 0.978", model))

p <- ggplot(roc_df, aes(x = plotx, y = ploty, color = model, linetype = model)) +
       geom_line() +
       scale_colour_discrete("") +
       scale_linetype_manual(name = '',
                             values=linetypes) +
       geom_segment(aes(x = 0, y = 0, xend = 1, yend = 1), alpha = 0.5) +
       scale_x_continuous(name = "False Positive Rate", expand = c(0.001,0.001)) +
       scale_y_continuous(name = "True positive rate", expand = c(0.001,0.001)) +
       theme_bw() +
       theme(legend.position=c(.65, 0.2),
             text = element_text(size=16),
             axis.title.y = element_text(margin = margin(1,1,1,1)),
             axis.text = element_text(size=16),
             axis.title=element_text(size=16)) 

ggsave("figures/Figure_5.jpeg", plot = p, width = 6, height = 6)


##### Online Appendix Table A3 and Figure 6


mm3.1 <- bayesglm(TP_mic_yes2 ~ neighbor*peacekeeping_total + tp_polity2 + tp_aid + tp_arms + tp_involv_decay + log(duration+1) + 
                          warstate_polity2 + ged_total+ log(warstate_gdppc) + log( warstate_population)+  factor(Outcome) + 
                          peace_month +peace_month_sq + peace_month_cub, data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)
mm3.2 <- bayesglm(TP_mic_yes2 ~ neighbor*mediation + tp_polity2 + tp_aid + tp_arms + tp_involv_decay +
                          warstate_polity2 + ged_total+ log(warstate_gdppc) + log( warstate_population)+  factor(Outcome) + log(duration+1) + 
                          peace_month +peace_month_sq + peace_month_cub, data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)
mm3.3 <- bayesglm(TP_mic_yes2 ~ neighbor*good_office_total + tp_polity2 + tp_aid + tp_arms + tp_involv_decay +
                          warstate_polity2 + ged_total+ log(warstate_gdppc) + log( warstate_population)+  factor(Outcome) + log(duration+1) + 
                          peace_month +peace_month_sq + peace_month_cub, data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)
mm3.4 <- bayesglm(TP_mic_yes2 ~ NSA_tp*mediation +  tp_involv_decay + warstate_polity2 + ged_total+ log(warstate_gdppc) +log(duration+1) + 
                          log( warstate_population) + factor(Outcome) +  peace_month +peace_month_sq + peace_month_cub, 
                  data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)
mm3.5 <- bayesglm(TP_mic_yes2 ~ NSA_tp*good_office_total +  tp_involv_decay + warstate_polity2 + ged_total+ log(warstate_gdppc) +log(duration+1) + 
                          log( warstate_population) + factor(Outcome) +  peace_month +peace_month_sq + peace_month_cub, 
                  data = data, family = binomial(link = "logit"), prior.scale=2.5, prior.df=1)


# ################################################### Figure 6
plot.coef(modResults = list(mm3.1, mm3.2, mm3.3, mm3.4, mm3.5), data = data,
          vars= c("neighbor", "NSA_tp", "tp_aid", "tp_arms","peacekeeping_total", "tp_involv_decay",
                  "mediation", "good_office_total")) + 
        scale_x_discrete(labels=c("TP Wartime Good Office(count)", 
                                  "TP Wartime Mediation(count)",
                                  "TP Neighbouring", 
                                  "TP Neighbouring*TP Wartime Good Office(count)",
                                  "TP Neighbouring*TP Wartime Mediation(count)",
                                  "TP Neighbouring*TP Wartime Peacekeeping(count)",
                                  "Non-state TP(=1)",
                                  "Non-state TP(=1)*TP Wartime Good Office(count)",
                                  "Non-state TP(=1)*TP Wartime Mediation(count)",
                                  "TP Wartime Peacekeeping(count)",
                                  "TP Aid", "TP Arms Transfer",  "Postwar MIC(decay)"
        ))
ggsave("figures/Figure_6.jpeg", width = 10, height = 6)

###### Appendix Table A3

stage2_names3  <-  c("Intercept","TP Neighbouring","TP Wartime Peacekeeping(count)","TP Neighbouring*TP Wartime Peacekeeping(count)",
                     "TP Polity2", "TP Aid", "TP Arms Transfer","Postwar MIC(decay)", "Conflict Duration (log)",
                     "Polity2", "Count of Postwar Violence", "GDP per Capita (log)","Population (log)",
                     "Ceasefire", "Government Victory", "Rebel Victory", "Low Activity",
                     "Peace Months","Peace Months$^2$", "Peace Months$^3$",
                     "TP Wartime Mediation (count)", "TP Neighbouring*TP Wartime mediation(count)",
                     "TP Wartime Good office(count)","TP Neighbouring*TP Wartime Good Office(count)",
                     "Non-state TP(=1)", "Non-state TP(=1)*TP Wartime Mediation(count)",
                     "Non-state TP(=1)*TP Wartime Good Office(count)")

texreg(list(mm3.1, mm3.2, mm3.3, mm3.4, mm3.5), file = "Table_A3.tex", single.row=F,
       stars = c(0.001, 0.01, 0.05, 0.1), booktable=TRUE, 
       custom.coef.names = stage2_names3, dcolumn =FALSE, use.packages = TRUE, longtable = FALSE,
       scalebox = 1.0, digits = 3, 
       caption = "Bayesian Logistic Regression on the Post-conflict Involvement (Types of Involvement)",
       label ="bayestab3",
       caption.above = TRUE)

##############################

## Appendix Table A1: Descriptive statistics
###discriptive table
tableA1 <- data[c("TP_mic_yes2","neighbor","invest_pre_total", "tp_polity2", "tp_involv_decay","duration","Outcome",
                "warstate_polity2", "warstate_gdppc","warstate_population","peace_month","tp_aid","tp_arms",
                "NSA_tp", "good_office_total","peacekeeping_total", "ged_total", "type_of_talks2_total")]

tableA1 <- tableA1 %>% 
       dplyr::mutate(Outcome = as.integer(Outcome)) %>% 
       dplyr::mutate(agreement  = ifelse(Outcome==1 & !is.na(Outcome), 1, 0),
                     ceasefire  = ifelse(Outcome==2 & !is.na(Outcome), 1, 0),
                     govwin  = ifelse(Outcome==3 & !is.na(Outcome), 1, 0),
                     rebelvin  = ifelse(Outcome==4 & !is.na(Outcome), 1, 0),
                     lowacti  = ifelse(Outcome==5 & !is.na(Outcome), 1, 0)) %>% 
       select(-Outcome)
tableA1 <- as.data.frame(tableA1)
#stargazer cannot work on dplyr

stargazer(tableA1, 
          title="Descriptive statistics (Third-party Post-conflict Month Level)", label = "Descriptive2",
          covariate.labels=c("Post-Conflict Involvement","TP Neighbouring","TP wartime MICs(count)","TP Polity2","Postwar MIC(decay)", "Conflict duration",
                             "Polity2",  "GDP per capita","Population", "Peace Months",
                             "TP Aid", "TP Arms Transfer", "Non-State TP(=1)", "TP wartime good office(count)", "TP wartime peacekeeping(count)",
                             "Number of Postwar Violence","TP wartime Indirect Talk(count)",
                             "Peace agreement", "Ceasefire", "Government Victory", "Rebel Victory", "Low Activity"),
          type = "latex", digits=1, out="Table_A1.tex")


##### This is for Appendix B

#####
load("data/third_parties_data.RData")

###discriptive table: Table B1 actor level N = 179
stargazer(third_parties[c("post_tp", "wartime_tp",  "neighbors", "aid_country", "arm_third")], 
          title="Descriptive statistics (Third-party level)", label = "Descriptive1",
          covariate.labels=c("Post-Conflict Involvement", "Wartime Involvement Experience", "Neighbors", "Aid Donors", "Arm Supplier"),
          type = "latex", digits=1, out="Table_B1.tex")

## Model: Table B2
m <- glm(post_tp ~ wartime_tp + neighbors + aid_country + arm_third, data =third_parties, family =binomial(link = "logit"))

tp_level_name <- c("Intercept", "Wartime Involvement Experience", "Neighbors", "Aid Donors", "Arm Supplier")
texreg(m, file = "Table_B2.tex", single.row=F,
       custom.model.names =c("Dependent Variable: Post-Conflict Involvement"),
       stars = c(0.001, 0.01, 0.05, 0.1), booktable=TRUE, 
       custom.coef.names = tp_level_name, dcolumn =FALSE, use.packages = TRUE, longtable = FALSE,
       scalebox = 1.0,
       digits = 3, caption = "Logistic Regression on the Post-conflict Involvement (Third-party level)",label ="tab1",
       caption.above = TRUE)


### Table B3: List of third parties
load("data/mic_clean.RData")
name.thirdparty <- as.numeric(as.character(third_parties[,1]))
NSA.name.thirdparty2_code <- name.thirdparty[name.thirdparty>999]

mic_tp <- mic_clean[,c(1,19:39,52)]
NSA.name.thirdparty2 <- c("The Sant' Egidio Community","Vatican","EC/EU","UN","AU","FIDH","NATO",
                          "Jimmy Carter", "Envoy or representative of Julius Nyerere (former Tanzanian President)","Julius Nyerere",
                          "NGO, domestic or local", "Privat person - not in a former official position acting independently","Nelson Mandela (former President of South Africa)",
                          "Envoy or representative of Nelson Mandela","Civil society / community activists","Arab League","South African Development Community",
                          "Christian representatives (catholics, lutherans, russian orthodox etc.)", "ACCORD","ECOWAS",
                          "Amnesty","Commonwealth of Portugese Speaking States","Businessmen","International Francophone Organisation",
                          "The New Partnership for Africa's Development","ICRC","International Alert, The London-based mediation group",
                          "The Commonwealth","Economic Community of West African States Monitoring Group","World Bank","IGADD/IGAD",
                          "CARE International","Abdel Alier","Archbishop of Uganda","US Catholic Church","Henry Dunant Center for Humanitarian Dialogue",
                          "Acholi Religious Leaders Peace Initiative", "EAC", "Prof. Washington Okumu", "Betty Bigombe","Autonomous Government of Southern Sudan",
                          "Pax Christi", "The Uganda Joint Christian Council","Moi Africa Institute","The Elders","Muslim / Islamic Religious Representatives (shiite, sunni etc.)",
                          "Human Rights organisation","Organisation of the Islamic Conference", "The Arab Maghreb Union")
NSA <- data.frame(code = NSA.name.thirdparty2_code,
                  name = NSA.name.thirdparty2)
state_thirdparty2_code <- name.thirdparty[name.thirdparty <1000]
state <- countrycode(state_thirdparty2_code, "cown", "country.name")
state <- data.frame(code = state_thirdparty2_code,
                    name = state)
tp_table <- rbind(NSA, state)
tp_table$code <- as.character(tp_table$code)
## Table B3:
xtable(tp_table)



### Figure 1-2
list.files("Africa_SHP")
africa <- readOGR("Africa_SHP",'Africa') #and the layer is the name of the shapefile (without the .shp extension)

africa.map <- fortify(africa, region="COUNTRY")

africa.map$GWNoLoc <- countrycode(africa.map$id, "country.name", "cown")

###find the sample data
samplecountry <- unique(data$GWNoLoc)

africa.map <- africa.map %>% 
        dplyr::mutate(country = ifelse(GWNoLoc %in% samplecountry, 1, 0 )) 


#Figure 2: sample country our paper

ggplot() + geom_polygon(data = africa.map, 
                        aes(x = long, y = lat, group = group, fill = factor(country)), 
                        color = "black", size = 0.25) + coord_fixed() +
       scale_colour_distiller(type = "seq", palette = 1, na.value = "grey50", guide = "colourbar") +
       scale_fill_manual(values = c("white","grey50"), name= "country in the sample",
                         guide = guide_legend(reverse = TRUE)) + 
       theme_nothing(legend = FALSE)
ggsave("figures/Figure_2.jpeg", width = 6, height =5.5)


##### Figure 1a

##dist. of MICs during/after conflict across country and year
load("data/conflict_month_mic_aggregated.RData")
table(conflict_month_mic_aggregated$PostWar_yes)
cc <- conflict_month_mic_aggregated %>% 
        dplyr::select(conflict_id, StartDate, StartDate2, EpEndDate, GWNoLoc, 
                      month, end, month.id, maxmonth.id, mic_no, PostWar_yes) %>% 
       dplyr::group_by(GWNoLoc,PostWar_yes) %>% dplyr::summarise(No.MIC = sum(mic_no))

cc$country <- countrycode(cc$GWNoLoc, "cown","country.name")
cc <- cc[which(cc$GWNoLoc %in% samplecountry),]
cc <-  cc %>% dplyr::mutate(country = ifelse(GWNoLoc == 490, "DR Congo", country)) %>% 
       dplyr::mutate(country = ifelse(GWNoLoc == 482, "Central African", country))

cc$type <- factor(cc$PostWar_yes, levels = c(0,1), labels = c("wartime", "postwar"))

ggplot(cc, aes(country, No.MIC, fill=type)) + 
       geom_bar(stat="identity", position="identity") +   scale_fill_manual(values = c("grey50","red")) +
       geom_text(data=subset(cc, No.MIC > 0), aes(label=No.MIC),
                 position = position_stack(vjust = 0.5), size = 2) + 
       labs( x ="",  
             y = "", fill = "") + #no legend tile
       theme(axis.text.x = element_text(angle = 45, hjust = 1),
             axis.text = element_text(size=10), 
             axis.title.x = element_text(size=12,vjust=-0.1),
             axis.title.y = element_text(size=12,vjust=+1.1),
             plot.background = element_blank(),
             panel.grid.major = element_blank())
ggsave("figures/Figure_1a.jpeg", width = 10, height =5 )

##### Figure 1b
library(foreign)
my_data <- read.dta("data/paperdata.dta")
##id = event id.
###recode USA as 2

my_data <- my_data %>% 
        dplyr::mutate(thirdparty = replace(thirdparty, thirdparty=="USA",2))

my_data <- my_data %>%
        dplyr::mutate(StartDate = as.Date(StartDate, origin = "1970-01-01"),
                      StartDate2 = as.Date(StartDate2, origin = "1970-01-01"),
                      month = as.Date(month, origin = "1970-01-01"),
                      EpEndDate = as.Date(EpEndDate, origin = "1970-01-01"))

####code diplomacy during civil war (0-1) or after civil war (0-1)
my_data <- my_data%>% 
        dplyr::mutate(WarDiplomacy = ifelse(end_event < EpEndDate, 1, 0))
my_data <- my_data%>% 
        dplyr::mutate(PeaceDiplomacy = ifelse(start_event > EpEndDate, 1 , 0))
table(my_data$PeaceDiplomacy)



##drop  0 or IO
my_data$thirdparty <- as.integer(my_data$thirdparty)
my_data2 <- my_data %>% 
        dplyr::filter(thirdparty > 0) 
##long_w/o IGO

fig_df <- my_data2 %>% 
        dplyr::group_by(thirdparty, WarDiplomacy, PeaceDiplomacy) %>% 
        dplyr::summarise(dup = n(), NoInt=cumsum(dup),
                         wartime = sum(WarDiplomacy),
                         postwar = sum(PeaceDiplomacy))

fig_df <- fig_df %>% 
        dplyr::mutate(continutiy = ifelse(wartime > 0 & postwar > 0, 1, 0))


my_data_states<- my_data%>% 
        dplyr::filter(thirdparty < 1000)
my_data_states$thirdname<- countrycode(my_data_states$thirdparty, "cown","iso3c")

###group by conflict
my_data_states_group <- my_data2%>% 
        dplyr::group_by(conflict_id, StartDate2, thirdparty, WarDiplomacy, PeaceDiplomacy) %>% 
        dplyr::summarise(dup = n(), NoInt=cumsum(dup))

my_data_fig_1 <- my_data_states_group %>%
        dplyr::select(conflict_id, StartDate2, thirdparty, WarDiplomacy, PeaceDiplomacy, dup) %>%
        dplyr::mutate(WarDiplomacy1 = WarDiplomacy*dup,
                      PeaceDiplomacy1= PeaceDiplomacy*dup)%>%
        dplyr::select(-PeaceDiplomacy, -dup,-WarDiplomacy)

my_data_fig_1$WarDiplomacy <- NULL

my_data_fig <- my_data_fig_1%>% 
        dplyr::group_by(conflict_id, StartDate2, thirdparty)%>% 
        dplyr::summarise(WarDiplomacy2= sum(WarDiplomacy1),
                         PeaceDiplomacy2 = sum(PeaceDiplomacy1))
###aggregtate by states for all conflicts
my_data_fig2 <- my_data_fig%>%
        dplyr::group_by(thirdparty)%>%
        dplyr::summarise(WarDiplomacy= sum(WarDiplomacy2),
                         PeaceDiplomacy = sum(PeaceDiplomacy2))

my_data_fig2 <- my_data_fig2 %>% 
        dplyr::mutate(countrycode  = ifelse(thirdparty < 1000, 
                                            countrycode(thirdparty, "cown","iso3c"),
                                            thirdparty))

## add UN and AU
my_data_fig2 <- my_data_fig2 %>% 
        dplyr::mutate(countrycode = ifelse(thirdparty == 11024, "11024 (UN)", countrycode)) %>% 
        dplyr::mutate(countrycode = ifelse(thirdparty == 11000, "11000 (AU)", countrycode)) 


#only label lower density area
# Get the density threshold that captures some percent of the data.
getLevel <- function(x, y, prob = 0.95) {
        kk <- MASS::kde2d(x,y)
        dx <- diff(kk$x[1:2])
        dy <- diff(kk$y[1:2])
        sz <- sort(kk$z)
        c1 <- cumsum(sz) * dx * dy
        threshold <- approx(c1, sz, xout = 1 - prob)$y
        dimnames(kk$z) <- list(kk$x,kk$y)
        dc <- melt(kk$z)
        list("kde" = kk, "density" = dc, "threshold" = threshold)
}


res <- getLevel(my_data_fig2$WarDiplomacy, my_data_fig2$PeaceDiplomacy)
# Identify points that are in the low density regions of the plot.
kx <- cut(my_data_fig2$WarDiplomacy, res$kde$x, labels = FALSE, include.lowest = TRUE)
ky <- cut( my_data_fig2$PeaceDiplomacy, res$kde$y, labels = FALSE, include.lowest = TRUE)
kz <- sapply(seq_along(kx), function(i) res$kde$z[kx[i], ky[i]])
my_data_fig2$low <- kz < quantile(kz, 0.2)

## make sure GHA is on the figure
my_data_fig2 <- my_data_fig2 %>% 
        dplyr::mutate(low = ifelse(countrycode == "GNB", TRUE, low))

ggplot() +
        geom_point(
                data = my_data_fig2,
                mapping = aes(x = WarDiplomacy , y =PeaceDiplomacy)
        ) +
        geom_text_repel(
                data = my_data_fig2[my_data_fig2$low,],
                mapping = aes(x= WarDiplomacy, y = PeaceDiplomacy, label = countrycode)
        ) +
        labs(x= "The number of wartime involvements",
             y ="The number of post-conflict involvements") + 
        theme_bw() + theme(axis.text.x = element_text(angle = 45, hjust = 1),
                           axis.text = element_text(size=14), 
                           axis.title.x = element_text(size=15,vjust=-0.1),
                           axis.title.y = element_text(size=15,vjust=+1.1),
                           plot.background = element_blank(),
                           panel.grid.major = element_blank())

ggsave("figures/Figure_1b.jpeg", width = 8, height = 5)

#### end of the replication code
rm(list = ls())
